import sys
import os
sys.path.insert(0, os.path.abspath("../"))
sys.path.insert(0, os.path.abspath("../nsforest/nsforesting"))
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import nsforest as ns
from nsforest import utils
from nsforest import preprocessing as pp
from nsforest import nsforesting
from nsforest import evaluating
from nsforest import plotting as pl
from IPython.display import Image
# Loading h5ad
data_folder = "../demo_data/"
file = data_folder + "adata_layer1.h5ad"
adata = sc.read_h5ad(file)
adata
AnnData object with n_obs × n_vars = 871 × 16497
obs: 'cluster'
cluster_header = "cluster"
# Looking at sample labels (obs_names)
adata.obs_names
Index(['A01_1_Nuclei_NeuNP_H200_1025_MTG_layer1_BCH9',
'A01_BCH3_1NeuNP_H200.1030_MTG_Layer_1',
'A02_BCH1_1NeuNP_H200.1025_MTG_layer_1',
'A03_1_Nuclei_NeuNP_H200_1025_MTG_layer1_BCH9',
'A04_1_Nuclei_NeuNP_H200_1025_MTG_layer1_BCH9',
'A04_BCH1_1NeuNP_H200.1025_MTG_layer_1',
'A04_BCH3_1NeuNP_H200.1030_MTG_Layer_1',
'A05_1_Nuclei_NeuNP_H200_1025_MTG_layer1_BCH9',
'A05_BCH1_1NeuNP_H200.1025_MTG_layer_1',
'A05_BCH3_1NeuNP_H200.1030_MTG_Layer_1',
...
'P09_1_Nuclei_NeuNN_H200_1025_MTG_layer1_BCH7',
'P09_1_Nuclei_NeuNN_H200_1025_MTG_layer1_BCH9',
'P09_1_Nuclei_NeuNN_H200_1030_MTG_layer1_BCH8',
'P09_BCH1_1NeuNN_H200.1025_MTG_layer_1',
'P10_1_Nuclei_NeuNN_H200_1025_MTG_layer1_BCH6',
'P10_1_Nuclei_NeuNN_H200_1025_MTG_layer1_BCH9',
'P10_BCH1_1NeuNN_H200.1025_MTG_layer_1',
'P11_1_Nuclei_NeuNN_H200_1025_MTG_layer1_BCH7',
'P11_1_Nuclei_NeuNN_H200_1025_MTG_layer1_BCH9',
'P11_1_Nuclei_NeuNN_H200_1030_MTG_layer1_BCH8'],
dtype='object', length=871)
# Looking at genes (var_names)
# Note: you can change the var.index to be ensembl id or gene name
# adata.var_names = adata.var["feature_name"]
adata.var_names
Index(['A1CF', 'A2M', 'A2M_AS1', 'A2ML1', 'A2ML1_AS1', 'A2MP1', 'A3GALT2',
'A4GALT', 'AAAS', 'AACS',
...
'ZUFSP', 'ZW10', 'ZWILCH', 'ZWINT', 'ZXDC', 'ZYG11A', 'ZYG11B', 'ZYX',
'ZZEF1', 'ZZZ3'],
dtype='object', length=16497)
# Checking cell annotation sizes
adata.obs[cluster_header].value_counts()
e1_e299_SLC17A7_L5b_Cdh13 299 i1_i90_COL5A2_Ndnf_Car4 90 i2_i77_LHX6_Sst_Cbln4 77 i3_i56_BAGE2_Ndnf_Cxcl14 56 i4_i54_MC4R_Ndnf_Cxcl14 54 g1_g48_GLI3_Astro_Gja1 48 i5_i47_TRPC3_Ndnf_Car4 47 i6_i44_GPR149_Vip_Mybpc1 44 i7_i31_CLMP_Ndnf_Cxcl14 31 g2_g27_APBB1IP_Micro_Ctss 27 i8_i27_SNCG_Vip_Mybpc1 27 i9_i22_TAC3_Vip_Mybpc1 22 g3_g18_GPNMB_OPC_Pdgfra 18 i10_i16_TSPAN12_Vip_Mybpc1 16 g4_g9_MOG_Oligo_Opalin 9 i11_i6_EGF_Vip_Mybpc1 6 Name: cluster, dtype: int64
cluster_header = "cluster"
medians_header = "medians_" + cluster_header
binary_scores_header = "binary_scores_" + cluster_header
# Running scanpy dendrogram
ns.pp.dendrogram(adata, cluster_header, save = "_" + cluster_header + ".png")
WARNING: dendrogram data not found (using key=dendrogram_cluster). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently.
WARNING: You’re trying to run this on 16497 dimensions of `.X`, if you really want this, set `use_rep='X'`.
Falling back to preprocessing with `sc.pp.pca` and default params.
C:\Users\bpeng\AppData\Local\anaconda3\Lib\site-packages\anndata\_core\anndata.py:522: FutureWarning: The dtype argument is deprecated and will be removed in late 2024. warnings.warn(
WARNING: saving figure to file figures\dendrogram_cluster.png
AnnData object with n_obs × n_vars = 871 × 16497
obs: 'cluster'
uns: 'dendrogram_cluster'
obsm: 'X_pca'
# Calculating cluster medians per gene
ns.pp.prep_medians(adata, cluster_header, use_mean = False, positive_genes_only = True)
Calculating medians...
Calculating medians (means) per cluster: 100%|█████████████████████████████████████████| 16/16 [00:00<00:00, 55.69it/s]
Saving calculated medians as adata.varm.medians_cluster --- 0.29226064682006836 seconds --- Only positive genes selected. 11688 positive genes out of 16497 total genes
AnnData object with n_obs × n_vars = 871 × 11688
obs: 'cluster'
uns: 'dendrogram_cluster'
obsm: 'X_pca'
varm: 'medians_cluster'
# Calculating binary scores per gene per cluster
ns.pp.prep_binary_scores(adata, cluster_header, medians_header)
adata
Calculating binary scores...
Calculating binary scores per cluster: 100%|███████████████████████████████████████████| 16/16 [01:18<00:00, 4.90s/it]
Saving calculated binary scores as adata.varm.binary_scores_cluster --- 78.71559691429138 seconds --- median: 0.0 mean: 0.1428629848824932 std: 0.23137174460615756
AnnData object with n_obs × n_vars = 871 × 16497
obs: 'cluster'
uns: 'dendrogram_cluster'
obsm: 'X_pca'
varm: 'medians_cluster', 'binary_scores_cluster'
a = plt.hist(adata.varm[medians_header].unstack(), bins = 100)
a = plt.title(f'{file.split("/")[-1].replace(".h5ad", "")}: {medians_header} histogram')
a = plt.xlabel(medians_header)
a = plt.yscale("log")
plt.show()
a = plt.hist(adata.varm[binary_scores_header].unstack(), bins = 100)
a = plt.title(f'{file.split("/")[-1].replace(".h5ad", "")}: {binary_scores_header} histogram')
a = plt.xlabel(binary_scores_header)
a = plt.yscale("log")
plt.show()
# Saving as new h5ad
filename = file.replace(".h5ad", "_preprocessed.h5ad")
print(f"Saving new anndata object as...\n{filename}")
adata.write_h5ad(filename)
Saving new anndata object as... ../demo_data/adata_layer1_preprocessed.h5ad
outputfilename_prefix = cluster_header
output_folder = "../outputs_layer1/"
# Adding this line to actually subset to positive genes if running NSForest
adata = ns.pp.prep_medians(adata, cluster_header, use_mean = True, positive_genes_only = True)
results = nsforesting.NSForest(adata, cluster_header, medians_header, binary_scores_header,
output_folder = output_folder, outputfilename_prefix = outputfilename_prefix)
Calculating medians... use_mean is True. Using the mean expression of each gene per cluster.
Calculating medians (means) per cluster: 100%|████████████████████████████████████████| 16/16 [00:00<00:00, 102.18it/s]
Saving calculated medians as adata.varm.medians_cluster --- 0.16356277465820312 seconds --- Only positive genes selected. 16419 positive genes out of 16497 total genes
Preparing adata... --- 0.002991914749145508 seconds --- Pre-selecting genes based on binary scores... Threshold (mean + 2 * std): 0.6069627661813182 Average number of genes after gene_selection in each cluster: 1077.125 Saving number of genes selected per cluster as... ../outputs_layer1/cluster_gene_selection.csv Number of clusters to evaluate: 16
running NSForest on all clusters: 0%| | 0/16 [00:00<?, ?it/s]
1 out of 16: e1_e299_SLC17A7_L5b_Cdh13 Pre-selected 2029 genes to feed into Random Forest.
running NSForest on all clusters: 6%|███ | 1/16 [00:07<01:53, 7.59s/it]
['TESPA1', 'SATB2'] fbeta: 0.9850034083162916 2 out of 16: g1_g48_GLI3_Astro_Gja1 Pre-selected 727 genes to feed into Random Forest.
running NSForest on all clusters: 12%|██████▏ | 2/16 [00:10<01:10, 5.04s/it]
['LINC00498'] fbeta: 0.9500000000000001 3 out of 16: g2_g27_APBB1IP_Micro_Ctss Pre-selected 485 genes to feed into Random Forest.
running NSForest on all clusters: 19%|█████████▏ | 3/16 [00:13<00:52, 4.04s/it]
['CSF2RA', 'ADAM28'] fbeta: 0.9756097560975611 4 out of 16: g3_g18_GPNMB_OPC_Pdgfra Pre-selected 453 genes to feed into Random Forest.
running NSForest on all clusters: 25%|████████████▎ | 4/16 [00:16<00:43, 3.63s/it]
['GPNMB', 'OLIG2'] fbeta: 0.8620689655172413 5 out of 16: g4_g9_MOG_Oligo_Opalin Pre-selected 693 genes to feed into Random Forest.
running NSForest on all clusters: 31%|███████████████▎ | 5/16 [00:20<00:38, 3.53s/it]
['ST18'] fbeta: 1.0 6 out of 16: i10_i16_TSPAN12_Vip_Mybpc1 Pre-selected 1445 genes to feed into Random Forest.
running NSForest on all clusters: 38%|██████████████████▍ | 6/16 [00:24<00:38, 3.89s/it]
['TSPAN12', 'CHRNB3'] fbeta: 0.8035714285714286 7 out of 16: i11_i6_EGF_Vip_Mybpc1 Pre-selected 2393 genes to feed into Random Forest.
running NSForest on all clusters: 44%|█████████████████████▍ | 7/16 [00:28<00:34, 3.88s/it]
['EGF', 'NRG3_AS1'] fbeta: 0.7142857142857143 8 out of 16: i1_i90_COL5A2_Ndnf_Car4 Pre-selected 477 genes to feed into Random Forest.
running NSForest on all clusters: 50%|████████████████████████▌ | 8/16 [00:32<00:31, 3.89s/it]
['COL5A2', 'FREM1'] fbeta: 0.8870967741935484 9 out of 16: i2_i77_LHX6_Sst_Cbln4 Pre-selected 520 genes to feed into Random Forest.
running NSForest on all clusters: 56%|███████████████████████████▌ | 9/16 [00:37<00:28, 4.13s/it]
['LHX6'] fbeta: 0.8166189111747851 10 out of 16: i3_i56_BAGE2_Ndnf_Cxcl14 Pre-selected 308 genes to feed into Random Forest.
running NSForest on all clusters: 62%|██████████████████████████████ | 10/16 [00:40<00:24, 4.06s/it]
['BAGE2', 'SYT10'] fbeta: 0.78125 11 out of 16: i4_i54_MC4R_Ndnf_Cxcl14 Pre-selected 408 genes to feed into Random Forest.
running NSForest on all clusters: 69%|█████████████████████████████████ | 11/16 [00:44<00:19, 3.89s/it]
['ARHGAP36', 'ADAM33', 'FAM19A4'] fbeta: 0.8707865168539326 12 out of 16: i5_i47_TRPC3_Ndnf_Car4 Pre-selected 1519 genes to feed into Random Forest.
running NSForest on all clusters: 75%|████████████████████████████████████ | 12/16 [00:50<00:17, 4.41s/it]
['NTNG1', 'EYA4'] fbeta: 0.9064327485380117 13 out of 16: i6_i44_GPR149_Vip_Mybpc1 Pre-selected 670 genes to feed into Random Forest.
running NSForest on all clusters: 81%|███████████████████████████████████████ | 13/16 [00:54<00:13, 4.44s/it]
['FLT1', 'GPR149'] fbeta: 0.7916666666666666 14 out of 16: i7_i31_CLMP_Ndnf_Cxcl14 Pre-selected 1599 genes to feed into Random Forest.
running NSForest on all clusters: 88%|██████████████████████████████████████████ | 14/16 [00:59<00:09, 4.52s/it]
['PAX6', 'TGFBR2'] fbeta: 0.9009009009009009 15 out of 16: i8_i27_SNCG_Vip_Mybpc1 Pre-selected 2031 genes to feed into Random Forest.
running NSForest on all clusters: 94%|█████████████████████████████████████████████ | 15/16 [01:04<00:04, 4.80s/it]
['SNCG', 'EDNRA'] fbeta: 0.759493670886076 16 out of 16: i9_i22_TAC3_Vip_Mybpc1 Pre-selected 1477 genes to feed into Random Forest.
running NSForest on all clusters: 100%|████████████████████████████████████████████████| 16/16 [01:09<00:00, 4.36s/it]
['MCTP2', 'VWC2L_IT1'] fbeta: 0.7051282051282052 Saving supplementary table as... ../outputs_layer1/cluster_supplementary.csv Saving markers table as... ../outputs_layer1/cluster_markers.csv Saving results table as... ../outputs_layer1/cluster_results.csv using median
Calculating medians (means) per cluster: 100%|████████████████████████████████████████| 16/16 [00:00<00:00, 356.51it/s]
Saving final results table as... ../outputs_layer1/cluster_results.csv --- 69.85787081718445 seconds ---
results
| clusterName | clusterSize | f_score | PPV | TN | FP | FN | TP | marker_count | NSForest_markers | binary_genes | onTarget | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | e1_e299_SLC17A7_L5b_Cdh13 | 299 | 0.985003 | 0.989726 | 569 | 3 | 10 | 289 | 2 | [TESPA1, SATB2] | [SLC17A7, TBR1, LINC00507, KIAA1211L, TESPA1, ... | 0.421602 |
| 1 | g1_g48_GLI3_Astro_Gja1 | 48 | 0.950000 | 1.000000 | 823 | 0 | 10 | 38 | 1 | [LINC00498] | [LINC00498, FGFR3, EMX2OS, SLC7A11, F3, RANBP3... | 1.000000 |
| 2 | g2_g27_APBB1IP_Micro_Ctss | 27 | 0.975610 | 1.000000 | 844 | 0 | 3 | 24 | 2 | [CSF2RA, ADAM28] | [INPP5D, PLCG2, CSF2RA, ADAM28, FLI1, CD74, SY... | 1.000000 |
| 3 | g3_g18_GPNMB_OPC_Pdgfra | 18 | 0.862069 | 1.000000 | 853 | 0 | 8 | 10 | 2 | [GPNMB, OLIG2] | [GPNMB, STK32A, COL20A1, OLIG2, KLRC3, B3GNT7,... | 1.000000 |
| 4 | g4_g9_MOG_Oligo_Opalin | 9 | 1.000000 | 1.000000 | 862 | 0 | 0 | 9 | 1 | [ST18] | [ST18, MOBP, CNDP1, FOLH1, MOG, CD22, TF, CERC... | 1.000000 |
| 5 | i10_i16_TSPAN12_Vip_Mybpc1 | 16 | 0.803571 | 0.900000 | 854 | 1 | 7 | 9 | 2 | [TSPAN12, CHRNB3] | [TSPAN12, LINC01539, TMC5, CHRNB3, FAM46A, ANG... | 0.783762 |
| 6 | i11_i6_EGF_Vip_Mybpc1 | 6 | 0.714286 | 1.000000 | 865 | 0 | 4 | 2 | 2 | [EGF, NRG3_AS1] | [KCNJ2_AS1, EGF, ZSCAN23, TEKT1, STK11IP, NRG3... | 0.892726 |
| 7 | i1_i90_COL5A2_Ndnf_Car4 | 90 | 0.887097 | 1.000000 | 781 | 0 | 35 | 55 | 2 | [COL5A2, FREM1] | [NMBR, COL5A2, SST, NPNT, NDNF, FREM1, FAT1, G... | 0.556021 |
| 8 | i2_i77_LHX6_Sst_Cbln4 | 77 | 0.816619 | 0.838235 | 783 | 11 | 20 | 57 | 1 | [LHX6] | [LHX6, FLT3, TAC1, CALB1, TRBC2, RSPO3, GRIK3,... | 1.000000 |
| 9 | i3_i56_BAGE2_Ndnf_Cxcl14 | 56 | 0.781250 | 0.961538 | 814 | 1 | 31 | 25 | 2 | [BAGE2, SYT10] | [BAGE2, SCN5A, GREM2, SYT10, ARHGAP18, GRB14, ... | 0.602383 |
| 10 | i4_i54_MC4R_Ndnf_Cxcl14 | 54 | 0.870787 | 1.000000 | 817 | 0 | 23 | 31 | 3 | [ARHGAP36, ADAM33, FAM19A4] | [ARHGAP36, MC4R, COBLL1, LINC01435, ADAM33, FA... | 0.420466 |
| 11 | i5_i47_TRPC3_Ndnf_Car4 | 47 | 0.906433 | 1.000000 | 824 | 0 | 16 | 31 | 2 | [NTNG1, EYA4] | [SSTR2, KIRREL, NTNG1, TARID, EYA4, CA2, TACR1... | 0.380471 |
| 12 | i6_i44_GPR149_Vip_Mybpc1 | 44 | 0.791667 | 1.000000 | 827 | 0 | 25 | 19 | 2 | [FLT1, GPR149] | [PLCE1_AS1, FLT1, CXCL12, SLC22A3, GPR149, SHI... | 0.811541 |
| 13 | i7_i31_CLMP_Ndnf_Cxcl14 | 31 | 0.900901 | 1.000000 | 840 | 0 | 11 | 20 | 2 | [PAX6, TGFBR2] | [FGF10, KIAA1644, CLMP, PAX6, SP8, TGFBR2, WIF... | 0.547845 |
| 14 | i8_i27_SNCG_Vip_Mybpc1 | 27 | 0.759494 | 0.923077 | 843 | 1 | 15 | 12 | 2 | [SNCG, EDNRA] | [SNCG, MMRN2, EDNRA, FBN3, KCNK2, SCML4, SAMD5... | 1.000000 |
| 15 | i9_i22_TAC3_Vip_Mybpc1 | 22 | 0.705128 | 0.785714 | 846 | 3 | 11 | 11 | 2 | [MCTP2, VWC2L_IT1] | [BSPRY, MCTP2, OFD1P10Y, OFD1P15Y, OFD1P9Y, VW... | 0.883928 |
# Getting dictionary of markers (format: {cluster: marker_list})
markers = pd.read_csv("../outputs_layer1/cluster_results.csv")
markers_dict = utils.prepare_markers(markers, "clusterName", "NSForest_markers")
outputfilename_prefix = "marker_eval"
results = evaluating.DecisionTree(adata, cluster_header, medians_header, markers_dict, combinations = False, use_mean = False,
output_folder = output_folder, outputfilename_prefix = outputfilename_prefix)
Preparing data... --- 0.00398707389831543 seconds --- Number of clusters to evaluate: 16 1 out of 16: e1_e299_SLC17A7_L5b_Cdh13 marker genes to be evaluated: ['TESPA1', 'SATB2'] ['TESPA1', 'SATB2'] f-beta:0.9850034083162916 PPV:0.9897260273972602 2 out of 16: g1_g48_GLI3_Astro_Gja1 marker genes to be evaluated: ['LINC00498'] ['LINC00498'] f-beta:0.9500000000000001 PPV:1.0 3 out of 16: g2_g27_APBB1IP_Micro_Ctss marker genes to be evaluated: ['CSF2RA', 'ADAM28'] ['CSF2RA', 'ADAM28'] f-beta:0.9756097560975611 PPV:1.0 4 out of 16: g3_g18_GPNMB_OPC_Pdgfra marker genes to be evaluated: ['GPNMB', 'OLIG2'] ['GPNMB', 'OLIG2'] f-beta:0.8620689655172413 PPV:1.0 5 out of 16: g4_g9_MOG_Oligo_Opalin marker genes to be evaluated: ['ST18'] ['ST18'] f-beta:1.0 PPV:1.0 6 out of 16: i10_i16_TSPAN12_Vip_Mybpc1 marker genes to be evaluated: ['TSPAN12', 'CHRNB3'] ['TSPAN12', 'CHRNB3'] f-beta:0.8035714285714286 PPV:0.9 7 out of 16: i11_i6_EGF_Vip_Mybpc1 marker genes to be evaluated: ['EGF', 'NRG3_AS1'] ['EGF', 'NRG3_AS1'] f-beta:0.7142857142857143 PPV:1.0 8 out of 16: i1_i90_COL5A2_Ndnf_Car4 marker genes to be evaluated: ['COL5A2', 'FREM1'] ['COL5A2', 'FREM1'] f-beta:0.8870967741935484 PPV:1.0 9 out of 16: i2_i77_LHX6_Sst_Cbln4 marker genes to be evaluated: ['LHX6'] ['LHX6'] f-beta:0.8166189111747851 PPV:0.8382352941176471 10 out of 16: i3_i56_BAGE2_Ndnf_Cxcl14 marker genes to be evaluated: ['BAGE2', 'SYT10'] ['BAGE2', 'SYT10'] f-beta:0.78125 PPV:0.9615384615384616 11 out of 16: i4_i54_MC4R_Ndnf_Cxcl14 marker genes to be evaluated: ['ARHGAP36', 'ADAM33', 'FAM19A4'] ['ARHGAP36', 'ADAM33', 'FAM19A4'] f-beta:0.8707865168539326 PPV:1.0 12 out of 16: i5_i47_TRPC3_Ndnf_Car4 marker genes to be evaluated: ['NTNG1', 'EYA4'] ['NTNG1', 'EYA4'] f-beta:0.9064327485380117 PPV:1.0 13 out of 16: i6_i44_GPR149_Vip_Mybpc1 marker genes to be evaluated: ['FLT1', 'GPR149'] ['FLT1', 'GPR149'] f-beta:0.7916666666666666 PPV:1.0 14 out of 16: i7_i31_CLMP_Ndnf_Cxcl14 marker genes to be evaluated: ['PAX6', 'TGFBR2'] ['PAX6', 'TGFBR2'] f-beta:0.9009009009009009 PPV:1.0 15 out of 16: i8_i27_SNCG_Vip_Mybpc1 marker genes to be evaluated: ['SNCG', 'EDNRA'] ['SNCG', 'EDNRA'] f-beta:0.759493670886076 PPV:0.9230769230769231 16 out of 16: i9_i22_TAC3_Vip_Mybpc1 marker genes to be evaluated: ['MCTP2', 'VWC2L_IT1'] ['MCTP2', 'VWC2L_IT1'] f-beta:0.7051282051282052 PPV:0.7857142857142857 using median
Calculating medians (means) per cluster: 100%|████████████████████████████████████████| 16/16 [00:00<00:00, 485.67it/s]
Saving final results table as... ../outputs_layer1/marker_eval_results.csv --- 0.5076773166656494 seconds ---
results
| clusterName | clusterSize | f_score | PPV | TN | FP | FN | TP | marker_count | markers | onTarget | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | e1_e299_SLC17A7_L5b_Cdh13 | 299 | 0.985003 | 0.989726 | 569 | 3 | 10 | 289 | 2 | [TESPA1, SATB2] | 0.421602 |
| 1 | g1_g48_GLI3_Astro_Gja1 | 48 | 0.950000 | 1.000000 | 823 | 0 | 10 | 38 | 1 | [LINC00498] | 1.000000 |
| 2 | g2_g27_APBB1IP_Micro_Ctss | 27 | 0.975610 | 1.000000 | 844 | 0 | 3 | 24 | 2 | [CSF2RA, ADAM28] | 1.000000 |
| 3 | g3_g18_GPNMB_OPC_Pdgfra | 18 | 0.862069 | 1.000000 | 853 | 0 | 8 | 10 | 2 | [GPNMB, OLIG2] | 1.000000 |
| 4 | g4_g9_MOG_Oligo_Opalin | 9 | 1.000000 | 1.000000 | 862 | 0 | 0 | 9 | 1 | [ST18] | 1.000000 |
| 5 | i10_i16_TSPAN12_Vip_Mybpc1 | 16 | 0.803571 | 0.900000 | 854 | 1 | 7 | 9 | 2 | [TSPAN12, CHRNB3] | 0.783762 |
| 6 | i11_i6_EGF_Vip_Mybpc1 | 6 | 0.714286 | 1.000000 | 865 | 0 | 4 | 2 | 2 | [EGF, NRG3_AS1] | 0.892726 |
| 7 | i1_i90_COL5A2_Ndnf_Car4 | 90 | 0.887097 | 1.000000 | 781 | 0 | 35 | 55 | 2 | [COL5A2, FREM1] | 0.556021 |
| 8 | i2_i77_LHX6_Sst_Cbln4 | 77 | 0.816619 | 0.838235 | 783 | 11 | 20 | 57 | 1 | [LHX6] | 1.000000 |
| 9 | i3_i56_BAGE2_Ndnf_Cxcl14 | 56 | 0.781250 | 0.961538 | 814 | 1 | 31 | 25 | 2 | [BAGE2, SYT10] | 0.602383 |
| 10 | i4_i54_MC4R_Ndnf_Cxcl14 | 54 | 0.870787 | 1.000000 | 817 | 0 | 23 | 31 | 3 | [ARHGAP36, ADAM33, FAM19A4] | 0.420466 |
| 11 | i5_i47_TRPC3_Ndnf_Car4 | 47 | 0.906433 | 1.000000 | 824 | 0 | 16 | 31 | 2 | [NTNG1, EYA4] | 0.380471 |
| 12 | i6_i44_GPR149_Vip_Mybpc1 | 44 | 0.791667 | 1.000000 | 827 | 0 | 25 | 19 | 2 | [FLT1, GPR149] | 0.811541 |
| 13 | i7_i31_CLMP_Ndnf_Cxcl14 | 31 | 0.900901 | 1.000000 | 840 | 0 | 11 | 20 | 2 | [PAX6, TGFBR2] | 0.547845 |
| 14 | i8_i27_SNCG_Vip_Mybpc1 | 27 | 0.759494 | 0.923077 | 843 | 1 | 15 | 12 | 2 | [SNCG, EDNRA] | 1.000000 |
| 15 | i9_i22_TAC3_Vip_Mybpc1 | 22 | 0.705128 | 0.785714 | 846 | 3 | 11 | 11 | 2 | [MCTP2, VWC2L_IT1] | 0.883928 |
ns.pl.boxplot_fscore(results, False, output_folder, outputfilename_prefix)
ns.pl.boxplot_ppv(results, False, output_folder, outputfilename_prefix)
ns.pl.boxplot_ontarget(results, False, output_folder, outputfilename_prefix)
ns.pl.scatter_w_clusterSize_fscore(results, False, output_folder, outputfilename_prefix)
ns.pl.scatter_w_clusterSize_ppv(results, False, output_folder, outputfilename_prefix)
ns.pl.scatter_w_clusterSize_ontarget(results, False, output_folder, outputfilename_prefix)
# If you want to specify dendrogram order
dendrogram = list(adata.uns["dendrogram_" + cluster_header]["categories_ordered"])
if dendrogram: # https://stackoverflow.com/questions/23482668/sorting-by-a-custom-list-in-pandas
results["clusterName"] = results["clusterName"].astype("category")
results["clusterName"] = results["clusterName"].cat.set_categories(dendrogram)
results = results.sort_values("clusterName")
if "markers" in list(results.columns):
markers_dict = dict(zip(results["clusterName"], results["markers"]))
elif "NSForest_markers" in list(results.columns):
markers_dict = dict(zip(results["clusterName"], results["NSForest_markers"]))
ns.pl.dotplot(adata, markers_dict, cluster_header, dendrogram, True, output_folder, outputfilename_prefix)
Saving... marker_eval.png WARNING: saving figure to file figures\dotplot_marker_eval.png
C:\Users\bpeng\AppData\Local\anaconda3\Lib\site-packages\scanpy\plotting\_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
ns.pl.stackedviolin(adata, markers_dict, cluster_header, dendrogram, True, output_folder, outputfilename_prefix)
Saving... marker_eval.png WARNING: saving figure to file figures\stacked_violin_marker_eval.png
ns.pl.matrixplot(adata, markers_dict, cluster_header, dendrogram, True, output_folder, outputfilename_prefix)
Saving... marker_eval.png WARNING: saving figure to file figures\matrixplot_marker_eval.png